////////////////////////////////////////////////////////////////////////////
//
// Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
//
// Please refer to the NVIDIA end user license agreement (EULA) associated
// with this source code for terms and conditions that govern your use of
// this software. Any use, reproduction, disclosure, or distribution of
// this software and related documentation outside the terms of the EULA
// is strictly prohibited.
//
////////////////////////////////////////////////////////////////////////////

/* Template project which demonstrates the basics on how to setup a project
* example application.
* Host code.
*/

// includes, system
//#include <stdlib.h>
//#include <stdio.h>
//#include <string.h>
//#include <math.h>
//
//// includes CUDA
//#include <cuda_runtime.h>
//
//// includes, project
//#include <helper_cuda.h>
//#include <helper_functions.h> // helper functions for SDK examples
//
//////////////////////////////////////////////////////////////////////////////////
//// declaration, forward
//void runTest(int argc, char **argv);
//
//extern "C"
//void computeGold(JINGDU *reference, JINGDU *idata, const unsigned int len);
//
//////////////////////////////////////////////////////////////////////////////////
////! Simple test kernel for device functionality
////! @param g_idata  input data in global memory
////! @param g_odata  output data in global memory
//////////////////////////////////////////////////////////////////////////////////
//__global__ void
//testKernel(JINGDU *g_idata, JINGDU *g_odata)
//{
//    // shared memory
//    // the size is determined by the host application
//    extern  __shared__  JINGDU sdata[];
//
//    // access thread id
//    const unsigned int tid = threadIdx.x;
//    // access number of threads in this block
//    const unsigned int num_threads = blockDim.x;
//
//    // read in input data from global memory
//    sdata[tid] = g_idata[tid];
//    __syncthreads();
//
//    // perform some computations
//    sdata[tid] = (JINGDU) num_threads * sdata[tid];
//    __syncthreads();
//
//    // write data to global memory
//    g_odata[tid] = sdata[tid];
//}

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
//int
//main(int argc, char **argv)
//{
//    runTest(argc, argv);
//}

////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
//void
//runTest(int argc, char **argv)
//{
//    bool bTestResult = true;
//
//    printf("%s Starting...\n\n", argv[0]);
//
//    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
//    int devID = findCudaDevice(argc, (const char **)argv);
//
//    StopWatchInterface *timer = 0;
//    sdkCreateTimer(&timer);
//    sdkStartTimer(&timer);
//
//    unsigned int num_threads = 32;
//    unsigned int mem_size = sizeof(JINGDU) * num_threads;
//
//    // allocate host memory
//    JINGDU *h_idata = (JINGDU *) malloc(mem_size);
//
//    // initalize the memory
//    for (unsigned int i = 0; i < num_threads; ++i)
//    {
//        h_idata[i] = (JINGDU) i;
//    }
//
//    // allocate device memory
//    JINGDU *d_idata;
//    checkCudaErrors(cudaMalloc((void **) &d_idata, mem_size));
//    // copy host memory to device
//    checkCudaErrors(cudaMemcpy(d_idata, h_idata, mem_size,
//                               cudaMemcpyHostToDevice));
//
//    // allocate device memory for result
//    JINGDU *d_odata;
//    checkCudaErrors(cudaMalloc((void **) &d_odata, mem_size));
//
//    // setup execution parameters
//    dim3  grid(1, 1, 1);
//    dim3  threads(num_threads, 1, 1);
//
//    // execute the kernel
//    testKernel<<< grid, threads, mem_size >>>(d_idata, d_odata);
//
//    // check if kernel execution generated and error
//    getLastCudaError("Kernel execution failed");
//
//    // allocate mem for the result on host side
//    JINGDU *h_odata = (JINGDU *) malloc(mem_size);
//    // copy result from device to host
//    checkCudaErrors(cudaMemcpy(h_odata, d_odata, sizeof(JINGDU) * num_threads,
//                               cudaMemcpyDeviceToHost));
//
//    sdkStopTimer(&timer);
//    printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
//    sdkDeleteTimer(&timer);
//
//    // compute reference solution
//    JINGDU *reference = (JINGDU *) malloc(mem_size);
//    computeGold(reference, h_idata, num_threads);
//
//    // check result
//    if (checkCmdLineFlag(argc, (const char **) argv, "regression"))
//    {
//        // write file for regression test
//        sdkWriteFile("./data/regression.dat", h_odata, num_threads, 0.0f, false);
//    }
//    else
//    {
//        // custom output handling when no regression test running
//        // in this case check if the result is equivalent to the expected solution
//        bTestResult = compareData(reference, h_odata, num_threads, 0.0f, 0.0f);
//    }
//
//    // cleanup memory
//    free(h_idata);
//    free(h_odata);
//    free(reference);
//    checkCudaErrors(cudaFree(d_idata));
//    checkCudaErrors(cudaFree(d_odata));
//
//    exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
//}
